Mean Atmospheric Temperature#
https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/doc/GHCND_documentation.pdf
import warnings
warnings.filterwarnings("ignore")
import os
import sys
import folium
import numpy as np
sys.path.append("../../../../indicators_setup")
from ind_setup.plotting_int import plot_timeseries_interactive
from ind_setup.plotting import plot_bar_probs, fontsize
from ind_setup.colors import get_df_col
sys.path.append("../../../functions")
from data_downloaders import GHCN
Define location and variables of interest#
country = 'Palau'
vars_interest = ['TMIN', 'TMAX']
Get Data#
df_country = GHCN.get_country_code(country)
print(f'The GHCN code for {country} is {df_country["Code"].values[0]}')
The GHCN code for Palau is PS
df_stations = GHCN.download_stations_info()
df_country_stations = df_stations[df_stations['ID'].str.startswith(df_country.Code.values[0])]
print(f'There are {df_country_stations.shape[0]} stations in {country}')
There are 13 stations in Palau
GHCND_dir = 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/'
for var in vars_interest:
globals()[f"dict_{var}"], IDS = GHCN.extract_dict_data_var(GHCND_dir, var, df_country_stations)
Plot Data#
map = folium.Map(location=[df_country_stations.iloc[0].Latitude-.25, df_country_stations.iloc[0].Longitude], zoom_start=10)
# Color list
colors = get_df_col()
# Add markers
ids_with_data = df_country_stations[df_country_stations['ID'].isin(np.unique(IDS))]
for i in range(len(ids_with_data)):
folium.Marker(
location=[ids_with_data.iloc[i].Latitude, ids_with_data.iloc[i].Longitude],
popup=ids_with_data.iloc[i]['ID'] + ids_with_data.iloc[i]['Name'],
icon=folium.DivIcon(
html=f'<div style="font-size: 25px; color: white; background-color: {colors[i]}; line-height: 1; width: 24px; padding: 0px;">☉</div>'
)
).add_to(map)
map
Make this Notebook Trusted to load map: File -> Trust Notebook
for var in vars_interest:
dict_plot = globals()[f'dict_{var}']
fig = plot_timeseries_interactive(dict_plot, trendline=False, ylims = [5, 40], figsize = (25, 12));
Using Koror Station#
Analysis of how much the maximum and minimum temperatures over time are changing.
The analysis of the difference between these 2 variables will allow us to know how the daily variability is being modified
id = 'PSW00040309' # Koror Station
dict_min = GHCN.extract_dict_data_var(GHCND_dir, 'TMIN', df_country_stations.loc[df_country_stations['ID'] == id])[0][0]
dict_max = GHCN.extract_dict_data_var(GHCND_dir, 'TMAX', df_country_stations.loc[df_country_stations['ID'] == id])[0][0]
import pandas as pd
st_data = pd.concat([dict_min['data'], (dict_max['data'])], axis=1).dropna()
st_data['diff'] = st_data['TMAX'] - st_data['TMIN']
st_data['TMEAN'] = (st_data['TMAX'] + st_data['TMIN'])/2
st_data = st_data.resample('Y').mean()
Mean temperature#
dict_plot = [{'data' : st_data, 'var' : 'TMEAN', 'ax' : 1, 'label' : 'TMEAN'},
]
dict_plot = [{'data' : st_data, 'var' : 'TMEAN', 'ax' : 1, 'label' : 'TMEAN'}]
fig = plot_timeseries_interactive(dict_plot, trendline=True, figsize = (25, 12))
st_data['TMEAN_ref'] = st_data['TMEAN'] - st_data.loc['1961':'1990'].TMEAN.mean()
import matplotlib.pyplot as plt
plot_bar_probs(x = st_data.index.year, y = st_data.TMEAN_ref, trendline = True, figsize = [15, 4])
plt.title('Temperature anomalies (Over and above 1961 - 1990 reference period)', fontsize = 15);
nevents = 10
top_10 = st_data.sort_values(by='TMEAN_ref', ascending=False).head(nevents)
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import acf
def get_data_metrics(st_data, var):
mean = np.nanmean(st_data[var])
std = np.nanstd(st_data[var])
max_val = np.nanmax(st_data[var])
min_val = np.nanmin(st_data[var])
median = np.nanmedian(st_data[var])
range_val = max_val - min_val
trend = np.polyfit(st_data.index.year, st_data[var], 1)[0]
# Extreme events
threshold = mean + 2 * std
extreme_days = len(st_data[var] > threshold)
# Percentiles
p10 = np.nanpercentile(st_data[var], 10)
p90 = np.nanpercentile(st_data[var], 90)
# Compile metrics
df = pd.DataFrame({
'Mean': mean,
'Median': median,
'Standard deviation': std,
'Maximum': max_val,
'Minimum': min_val,
'Range': range_val,
'10th Percentile': p10,
'90th Percentile': p90,
'Trend': trend,
'Extreme Days (>2σ)': extreme_days,
}, index=[var])
return np.round(df, 3)
from ind_setup.tables import plot_df_table
var = 'TMEAN'
df = get_data_metrics(st_data, var, )
fig = plot_df_table(df.T, figsize = (300, 400))
import matplotlib.pyplot as plt
ax = plot_bar_probs(x = st_data.index.year, y = st_data.TMEAN_ref, trendline = True,
y_label = 'Mean Temperature', figsize = [15, 4])
im = ax.scatter(top_10.index.year, top_10.TMEAN_ref,
c=top_10.TMEAN_ref.values, s=100,
cmap = 'rainbow', label='Top 10 warmest years')
plt.title('Temperature anomalies (Over and above 1961 - 1990 reference period)', fontsize = 15)
plt.colorbar(im).set_label('Mean Temperature', fontsize = fontsize)
Minimum temperature#
dict_plot = [{'data' : st_data, 'var' : 'TMIN', 'ax' : 1, 'label' : 'TMIN'},
# {'data' : st_data, 'var' : 'TMAX', 'ax' : 1, 'label' : 'TMAX'},
# {'data' : st_data, 'var' : 'diff', 'ax' : 1, 'label' : 'Difference TMAX - TMIN'}
]
dict_plot = [{'data' : st_data, 'var' : 'TMIN', 'ax' : 1, 'label' : 'TMIN'}]
fig = plot_timeseries_interactive(dict_plot, trendline=True, figsize = (25, 12))
Maximum temperature#
dict_plot = [{'data' : st_data, 'var' : 'TMAX', 'ax' : 1, 'label' : 'TMAX'}]
fig = plot_timeseries_interactive(dict_plot, trendline=True, figsize = (25, 12))
dict_plot = [{'data' : st_data, 'var' : 'TMIN', 'ax' : 1, 'label' : 'TMIN'},
{'data' : st_data, 'var' : 'TMAX', 'ax' : 2, 'label' : 'TMAX'},
# {'data' : st_data, 'var' : 'diff', 'ax' : 1, 'label' : 'Difference TMAX - TMIN'}
]
fig = plot_timeseries_interactive(dict_plot, trendline=True, figsize = (24, 11))
Difference temperature#
dict_plot = [{'data' : st_data, 'var' : 'diff', 'ax' : 1, 'label' : 'Difference TMAX - TMIN'}]
fig = plot_timeseries_interactive(dict_plot, trendline=True, figsize = (25, 12))